import json
import random

# 假设原始JSON文件为 'data.json'，并且该文件已经加载到内存
input_file = r'/data/PaddlePaddle/translation2019zh_valid(39k).json'
output_file = r'/data/PaddlePaddle/valid_3k.json'

# 读取原始JSON文件
with open(input_file, 'r', encoding='utf-8') as f:
    data = json.load(f)

# 随机抽取2000条数据
random_data = random.sample(data, 3000)

# 将抽取的数据保存为新的JSON文件
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(random_data, f, ensure_ascii=False, indent=4)

print(f"Successfully saved 2000 random pairs to {output_file}.")
